#!/usr/bin/env python
# coding: utf-8

# In[1]:


exec(open('init_path.py').read())
exec((P_Lib/'GasStation.py').read_text())
get_ipython().run_line_magic('matplotlib', 'inline')


# In[2]:


# fuel = 'e10'
# fuel = 'e5'
# fuel = 'diesel'
h_min = 7; h_max = 21
fuels = ['e5','e10','diesel'] 

# ### Number of Price Changes Per Day

# In[3]:


files = sorted(list((P_GS_Data_Raw / 'PH_Day').glob('*.h5')))
print(len(files))
ls_strYMD_weekday_nonholiday = load_obj(P_GS_Data / 'GS' / 'ls_strYMD_weekday_nonholiday.pkl')
files = [f for f in files if f.stem in ls_strYMD_weekday_nonholiday]
print(len(files))


# #### One Type of Fuel

# In[4]:
for fuel in fuels:
    def PCCount_one_fuel_type_one_day(f):
        ymd = f.name.split('/')[-1].split('.')[0]
        df = read_hdf(f, 'GS')
        df = df[df[fuel]>0]
        df = df[df[fuel]!=df.groupby('StID')[fuel].shift(1)]
    #     df['H'] = df.Time.dt.hour
    #     df = df[(df.H>=h_min) & (df.H<=h_max)]
        df_day = df.groupby('StID')[fuel].count()
        df_day.name = ymd
        return df_day
    # ls = []
    # for f in files:
    # for f in files[:10]:
    f = files[-1]
    #     ls.append(summarize_PCCount_by_day(f))


    # In[28]:

        with Pool(4) as p:
            ls = p.map(PCCount_one_fuel_type_one_day, files)


    # #### All Fuel Types

    # In[5]:


    def PCCount_all_fuel_types_one_day(f):
        ymd = f.name.split('/')[-1].split('.')[0]
        df = read_hdf(f, 'GS')
        df['H'] = df.Time.dt.hour
        df = df[(df.H>=h_min) & (df.H<=h_max)]
        df_day = df.groupby('StID').H.count()
        df_day.name = ymd
        return df_day
    # ls = []
    # for f in files:
    # for f in files[:10]:
    # f = files[-1]
    #     ls.append(summarize_PCCount_by_day(f))
    # PCCount_all_fuel_types_one_day(f)


    # In[ ]:


    with Pool(4) as p:
        ls = p.map(PCCount_all_fuel_types_one_day, files)


    # In[31]:


    df = concat(ls, axis=1)
    df.head(2)


    # In[32]:


    df = df.stack().reset_index()
    df.columns = ['StID','YMD','PCCount']
    df = df.sort_values(['StID','YMD'])
    df['PCCount'] = df.PCCount.astype(int)
    df['YMD'] = df.YMD.astype(int)
    df.head(2)


    # In[33]:


    df.tail(2)


    # In[34]:


    fname ='daily_weekday_nonholiday-7to21-positive_price_only_' + fuel  
    print(fname)
    df.to_hdf(P_GS_Data / 'PCCount' / (fname+'.h5'), 'GS', mode='w', complevel=9, complib='blosc')
    df.to_stata(P_GS_Data / 'PCCount' / (fname+'.dta'))
# In[ ]:




# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:






# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:





# In[ ]:




